In [1]:
import pandas as pd
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
In [17]:
# Cumulative probability P(X<120) where X ~ N(100, 10^2)
print("P(X<120) where X ~ N(100, 10^2) = %.3f" % stats.norm.cdf(120, loc=100, scale=10))
# Calculate value
print("x for which P(X < x = 0.97) = %.1f" % stats.norm.ppf(0.97, loc=100, scale=10))
In [18]:
# Cumulative probability P(X<120) where X ~ N(100, 10^2)
print("P(X<120) where X ~ t with df = 10, mean = 100 and sigma = 10) = %.3f" % stats.t.cdf(120, df=10, loc=100, scale=10))
# Calculate value
print("x for which P(X < x = 0.97) = %.1f" % stats.t.ppf(0.97, df=10, loc=100, scale=10))
Let us first read the data from the csv file into a Pandas "DataFrame" using Pandas' built in parsers.
In [2]:
df = pd.read_csv("co2_temp_yr.csv", delimiter=",")
print(df)
The DataFrame contains a lot of useful convenience functions. For example, df.describe() gives you a quick summary of useful information! See more at http://pandas.pydata.org/pandas-docs/stable/10min.html.
In [3]:
df.describe()
Out[3]:
You can also do quick plotting of the data. The results are not aesthetically the best, but it is useful for a quick visual of the data
In [4]:
ax = df.plot(x="CO2 ppm", y="Global Temp", style='o')
Much nicer results can be obtained using a dedicated plotter like seaborn.
In [5]:
ax = sns.regplot(x="CO2 ppm", y="Global Temp", data=df)
In [6]:
res = stats.linregress(df["CO2 ppm"], df["Global Temp"])
print("Slope = %.3f" % res.slope)
print("Intercept = %.3f" % res.intercept)
print("R = %.3f" % res.rvalue)
print("Std error = %.3f" % res.stderr)
In [7]:
df = pd.read_table("polymer.csv", delimiter=",", index_col=0)
print(df)
Each row in the above data is one group.
In [8]:
print(stats.f_oneway(*df.as_matrix()))
In [ ]:
In [ ]: